{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "74ae54e0-1b18-4389-b50d-29eb208bf79f",
   "metadata": {},
   "source": [
    "# Query result content\n",
    "\n",
    "Queries return a `Result` object that we can customize according to our needs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dd20a65d-bc10-404e-bb12-4be0af0cbfde",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install superlinked==37.5.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "023e8d69-be05-4ac8-9513-d9dcacb0193a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from superlinked import framework as sl\n",
    "\n",
    "pd.set_option(\"display.max_colwidth\", 100)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6bc9bfa7-5302-4a93-9f16-ce54113aea37",
   "metadata": {},
   "source": [
    "Let's create a simple config and ingest two entities of text just for demonstration purposes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3a35be4b-c014-49ce-a058-871c6ad35fc7",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Paragraph(sl.Schema):\n",
    "    id: sl.IdField\n",
    "    body: sl.String\n",
    "    like_count: sl.Integer\n",
    "\n",
    "\n",
    "paragraph = Paragraph()\n",
    "\n",
    "body_space = sl.TextSimilaritySpace(text=paragraph.body, model=\"sentence-transformers/all-mpnet-base-v2\")\n",
    "like_space = sl.NumberSpace(number=paragraph.like_count, min_value=0, max_value=100, mode=sl.Mode.MAXIMUM)\n",
    "# indices can be built on top of multiple spaces as simple as that\n",
    "paragraph_index = sl.Index([body_space, like_space], fields=paragraph.like_count)\n",
    "\n",
    "source: sl.InMemorySource = sl.InMemorySource(paragraph)\n",
    "executor = sl.InMemoryExecutor(sources=[source], indices=[paragraph_index])\n",
    "app = executor.run()\n",
    "\n",
    "source.put(\n",
    "    [\n",
    "        {\n",
    "            \"id\": \"paragraph-1\",\n",
    "            \"body\": \"Glorious animals live in the wilderness.\",\n",
    "            \"like_count\": 75,\n",
    "        },\n",
    "        {\n",
    "            \"id\": \"paragraph-2\",\n",
    "            \"body\": \"Growing computation power enables advancements in AI.\",\n",
    "            \"like_count\": 10,\n",
    "        },\n",
    "    ]\n",
    ")\n",
    "\n",
    "query_base = (\n",
    "    sl.Query(\n",
    "        paragraph_index,\n",
    "        weights={\n",
    "            body_space: 1.0,\n",
    "            like_space: 1.0,\n",
    "        },\n",
    "    )\n",
    "    .find(paragraph)\n",
    "    .similar(body_space, \"What makes the AI industry go forward?\")\n",
    "    .filter(paragraph.like_count > 15)\n",
    "    .include_metadata()  # this clause makes the result entries contain metadata, like partial scores\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e29036e6-7144-4fe5-bc1e-a287742cf205",
   "metadata": {},
   "source": [
    "## General result structure\n",
    "\n",
    "Result consist of entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c986b550-30e4-47f3-a5ad-831220488bf0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[ResultEntry(id='paragraph-1', fields={}, metadata=ResultEntryMetadata(score=0.4417109896633091, partial_scores=[-0.020228776592334182, 0.46193976625564326], vector_parts=[]))]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = app.query(query_base)\n",
    "\n",
    "result.entries"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7438fc51-03e8-4807-b450-55924eed1437",
   "metadata": {},
   "source": [
    "and entries have metadata, containing the score for example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5536a126-b3ee-452c-80ef-0513afc12f3c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4417109896633091"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.entries[0].metadata.score"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "19ed8f9a-2303-4be3-8217-8b8fb03655f5",
   "metadata": {},
   "source": [
    "and partial scores - a score breakdown per spaces in the index."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "597dfbff-ea00-4a86-949f-ad1bd7a601dd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[-0.020228776592334182, 0.46193976625564326]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.entries[0].metadata.partial_scores"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d75eb6c-de33-4ba5-a2a8-c38560ac43c1",
   "metadata": {},
   "source": [
    "these naturally add to up to the score itself..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f279821d-ebdd-41b9-848e-27779b33e4cb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(result.entries[0].metadata.partial_scores) == result.entries[0].metadata.score"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8920dc40-0aca-4522-8743-8bb9397cdf69",
   "metadata": {},
   "source": [
    "...and refer to the spaces in the order those were supplied at index creation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5529cd60-2cf7-43fd-aac2-c938637dbf10",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'body_space': -0.020228776592334182, 'like_space': 0.46193976625564326}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "partial_scores_dict = dict(zip([\"body_space\", \"like_space\"], result.entries[0].metadata.partial_scores))\n",
    "partial_scores_dict"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ac94d3a3-a09b-4f84-8454-6804ddab78a7",
   "metadata": {},
   "source": [
    "### Display results"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fa8653de-5bb7-4d92-bd1d-85d66ed049a3",
   "metadata": {},
   "source": [
    "A nice way to show results in a `pandas DataFrame` is the following:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "674e926b-04bf-4b44-8138-61dbc194acac",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>similarity_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>paragraph-1</td>\n",
       "      <td>0.441711</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            id  similarity_score\n",
       "0  paragraph-1          0.441711"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sl.PandasConverter.to_pandas(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "22768830-0ba0-4f8c-969e-eb4b93522319",
   "metadata": {},
   "source": [
    "and we can add the partial scores to the dataframe."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1b9f0d60-9fe8-4970-9bd5-43abc921e827",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>similarity_score</th>\n",
       "      <th>body_space</th>\n",
       "      <th>category_space</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>paragraph-1</td>\n",
       "      <td>0.441711</td>\n",
       "      <td>-0.020229</td>\n",
       "      <td>0.46194</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            id  similarity_score  body_space  category_space\n",
       "0  paragraph-1          0.441711   -0.020229         0.46194"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_partial_score_df(query_result: sl.QueryResult) -> pd.DataFrame:\n",
    "    partial_score_df: pd.DataFrame = pd.DataFrame(\n",
    "        [[entry.id] + list(entry.metadata.partial_scores) for entry in query_result.entries],\n",
    "        columns=[\"id\", \"body_space\", \"category_space\"],\n",
    "    )\n",
    "    return sl.PandasConverter.to_pandas(query_result).merge(partial_score_df, on=\"id\")\n",
    "\n",
    "\n",
    "get_partial_score_df(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a77ac74f-81e9-4547-bfc5-056143cd6d98",
   "metadata": {},
   "source": [
    "### Result metadata\n",
    "\n",
    "Result metadata contains several useful things, like the vector the KNN search was carried out with in the VDB,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "892f82fa-2727-4c92-94fc-f06bd1d93f4b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.018769700726501557,\n",
       " 0.03672832746481938,\n",
       " -0.0372933507748233,\n",
       " -0.03523925553426185,\n",
       " -0.023609617164574218]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.metadata.search_vector[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f6ed7d38-994f-493e-9b61-ab9a60ecf80a",
   "metadata": {},
   "source": [
    "and contains the schema, of which the results are from"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "5dc80ab8-d559-4e47-8a79-e607b48bde5a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Paragraph'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.metadata.schema_name"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4703c70c-293f-4d1e-98e2-5561ffeb74f9",
   "metadata": {},
   "source": [
    "also the actual inputs to the query are also accessible. Many of these parameters are implicit, hence the generated names. You can always make them explicit, thereby giving them names.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "5f3996ba-2c02-4216-9a23-3caac04c3452",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'similar_filter_TextSimilaritySpace_e74d_Paragraph_body_value_param__': 'What makes the AI industry go forward?',\n",
       " 'similar_filter_TextSimilaritySpace_e74d_Paragraph_body_weight_param__': 1.0,\n",
       " 'hard_filter_like_count_be_greater_than_param__': 15,\n",
       " 'limit_param__': -1,\n",
       " 'radius_param__': None,\n",
       " 'select_param__': [],\n",
       " 'space_weight_TextSimilaritySpace_e74d_param__': 1.0,\n",
       " 'space_weight_NumberSpace_b3ce_param__': 1.0}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.metadata.search_params"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aedbbbfb-07e6-4eab-8d53-afc9bdb00fc1",
   "metadata": {},
   "source": [
    "Notice `'hard_filter_like_count_be_greater_than_param__': 15` describes the .filter clause's effect for example."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5c24a1ee-4e0f-482d-b04a-843fb6f9a3f7",
   "metadata": {},
   "source": [
    "### Configure what fields are returned in the Result"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5be869b4-6e87-4417-aae3-0a7ca5adb548",
   "metadata": {},
   "source": [
    "#### .select_all()\n",
    "\n",
    "This clause makes the query result return all the fields the entities have."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7423d2d-d767-4313-849c-7b8cef190655",
   "metadata": {},
   "source": [
    "A query without any select clause will return none of the fields."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "acd32fbb-65ef-48a3-9dbf-628e3e37d22e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_no_select_clause = app.query(query_base)\n",
    "result_no_select_clause.entries[0].fields"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7693f194-3a55-403a-9842-6d7c8f9d591d",
   "metadata": {},
   "source": [
    "Notice all 2 fields (both `body` and `like_count` of `paragraph`) are returned with the `.select_all()` clause."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5f0151de-9a06-4343-8741-931b2ddb3ba8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'body': 'Glorious animals live in the wilderness.', 'like_count': 75}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query_select_all = query_base.select_all()\n",
    "result_select_all = app.query(query_select_all)\n",
    "result_select_all.entries[0].fields"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6339438c-6e22-4fe6-8c93-80c38bc094b9",
   "metadata": {},
   "source": [
    "#### .select()\n",
    "\n",
    "The select clause enables fine-grained field selection."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "821ae844-962a-4502-bbfe-c86394e5c87e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'body': 'Glorious animals live in the wilderness.'}"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query_fix_select = query_base.select(paragraph.body)\n",
    "result_fix_select = app.query(query_fix_select)\n",
    "result_fix_select.entries[0].fields"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "611d410d-a883-403c-931e-b8d0d8e403f8",
   "metadata": {},
   "source": [
    "It can also be used with an `sl.Param`, filled query time."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d4ac4465-3d94-4306-939d-f88b9f6e8c12",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_select = query_base.select(sl.Param(\"select_fields\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c12e9ca3-027c-4b2d-8bb3-ead72fad8eac",
   "metadata": {},
   "source": [
    "Fields can be referenced via string names,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "342fd293-3ff4-42c1-b390-7ee56df942dc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'body': 'Glorious animals live in the wilderness.'}"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_body = app.query(query_select, select_fields=[\"body\"])\n",
    "result_body.entries[0].fields"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "96126581-d3fd-4b84-b553-646dfc84d6f5",
   "metadata": {},
   "source": [
    "or referring to the schema field itself."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "aec526e2-37e7-4b4f-82ed-6a5431cb2354",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'like_count': 75}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_like = app.query(query_select, select_fields=[paragraph.like_count])\n",
    "result_like.entries[0].fields"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3a85b4b7-9ffe-44d2-87a7-9cd15b57b681",
   "metadata": {},
   "source": [
    "An empty select clause will result in no fields are returned for efficiency (ids are always returned of course),"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "486a8c50-1cb7-492b-9be7-dd85fed72aea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_empty = app.query(query_select)\n",
    "result_empty.entries[0].fields"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ab72bb97-5992-4778-88be-c0d0cad24b35",
   "metadata": {},
   "source": [
    "and referencing ways can even be mixed in the same clause."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "8877dc2d-de41-4384-a7ff-04a9ed60e8f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'body': 'Glorious animals live in the wilderness.', 'like_count': 75}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_both = app.query(query_select, select_fields=[paragraph.body, \"like_count\"])\n",
    "result_both.entries[0].fields"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "superlinked-py3.11",
   "language": "python",
   "name": "superlinked-py3.11"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
